program psi_main

 use mod_utils, only: &
   t_realtime, my_second

 use mod_messages, only: &
   mod_messages_constructor, &
   mod_messages_destructor,  &
   mod_messages_initialized, &
   error,   &
   warning, &
   info

 use mod_kinds, only: &
   mod_kinds_constructor, &
   mod_kinds_destructor,  &
   mod_kinds_initialized, &
   wp

 use mod_fu_manager, only: &
   mod_fu_manager_constructor, &
   mod_fu_manager_destructor,  &
   mod_fu_manager_initialized, &
   new_file_unit

 !$ use mod_omp_utils, only: &
 !$   detailed_timing_omp, &
 !$   ta_omp, tb_omp,      &
 !$   tc_omp, td_omp,      &
 !$   prefix_omp_out,      &
 !$   tform_omp,           &
 !$   omp_get_wtime

 use mod_octave_io, only: &
   mod_octave_io_constructor, &
   mod_octave_io_destructor,  &
   mod_octave_io_initialized, &
   write_octave, &
   read_octave

 use mod_sympoly, only: &
   mod_sympoly_constructor, &
   mod_sympoly_destructor,  &
   mod_sympoly_initialized, &
   show

 use mod_octave_io_sympoly, only: &
   mod_octave_io_sympoly_constructor, &
   mod_octave_io_sympoly_destructor,  &
   mod_octave_io_sympoly_initialized, &
   write_octave

 use mod_perms, only: &
   mod_perms_constructor, &
   mod_perms_destructor,  &
   mod_perms_initialized

 use mod_octave_io_perms, only: &
   mod_octave_io_perms_constructor, &
   mod_octave_io_perms_destructor,  &
   mod_octave_io_perms_initialized

 use mod_linal, only: &
   mod_linal_constructor, &
   mod_linal_destructor,  &
   mod_linal_initialized, &
   invmat_nop

 use mod_sparse, only: &
   mod_sparse_constructor, &
   mod_sparse_destructor,  &
   mod_sparse_initialized, &
   ! sparse types
   t_col,       &
   t_tri,       &
   ! construction of new objects
   new_col,     &
   new_tri,     &
   ! convertions
   col2tri,     &
   tri2col,     &
   tri2col_skeleton, &
   ! overloaded operators
   operator(+), &
   operator(*), &
   sum,         &
   transpose,   &
   matmul,      &
   diag,        &
   clear

 use mod_octave_io_sparse, only: &
   mod_octave_io_sparse_constructor, &
   mod_octave_io_sparse_destructor,  &
   mod_octave_io_sparse_initialized, &
   write_octave

 use mod_output_control, only: &
   mod_output_control_constructor, &
   mod_output_control_destructor,  &
   mod_output_control_initialized, &
   base_name

 use mod_master_el, only: &
   mod_master_el_constructor, &
   mod_master_el_destructor,  &
   mod_master_el_initialized

 use mod_grid, only: &
   mod_grid_constructor, &
   mod_grid_destructor,  &
   mod_grid_initialized!, &
   !t_grid,  &
   !grid

! use mod_cgdofs, only: &
!   mod_cgdofs_constructor, &
!   mod_cgdofs_destructor,  &
!   mod_cgdofs_initialized, &
!   ndofs,    &
!   dofs,     &
!   dofs_xy
!
! use mod_bcs, only: &
!   mod_bcs_constructor, &
!   mod_bcs_destructor,  &
!   mod_bcs_initialized, &
!   b_dir,          &
!   b_e2be,         &
!   t_b_v, b_vert,  &
!   p_t_b_v, b_v2bv,&
!   t_b_s, b_side,  &
!   p_t_b_s, b_s2bs

 use mod_base, only: &
   mod_base_constructor, &
   mod_base_destructor,  &
   mod_base_initialized, &
   t_base,       &
   write_octave

 use mod_fe_spaces, only: &
   mod_fe_spaces_constructor, &
   mod_fe_spaces_destructor,  &
   mod_fe_spaces_initialized, &
   dg_scal, & ! scalar f.e. space DG (orthonormal)
   tri_nodes, & ! nodes on the reference triangle
   lagr_scal  ! scalar Lagrangian basis

 use mod_testcases, only: &
   mod_testcases_constructor, &
   mod_testcases_destructor,  &
   mod_testcases_initialized, &
   ndim,        &
   test_name,   &
   test_description,&
!   coeff_diff,  &
!   coeff_adv,   &
   coeff_dir,   &
!   coeff_lam,   &
!   coeff_q,     &
   coeff_init

! use mod_psi_locmat, only: &
!   mod_psi_locmat_constructor, &
!   mod_psi_locmat_destructor,  &
!   mod_psi_locmat_initialized, &
!   lumped_mass, &
!   psi_locmat_nc

! use mod_error_norms, only: &
!   mod_error_norms_constructor, &
!   mod_error_norms_destructor,  &
!   mod_error_norms_initialized, &
!   primal_err, &
!   primal_flux_err

!-----------------------------------------------------------------------
 
 implicit none

!-----------------------------------------------------------------------
 
 ! Parameters
 integer, parameter :: max_char_len = 1000
 character(len=*), parameter :: input_file_name = 'psi.in'
 character(len=*), parameter :: this_prog_name = 'psi'
 character(len=*), parameter :: elapsed_format = '(A,F9.3,A)'

 ! Main variables
 logical :: write_grid, write_sys, compute_error_norms
 integer :: nbound, err_extra_deg
 integer, allocatable :: bc_type(:,:)
 real(wp), allocatable :: ref_nodes(:,:) ! nodes Lagrangian base
 character(len=max_char_len) :: grid_file, in_testname, in_basename, &
   cbc_type, adv_scheme
 type(t_base) :: base

 ! Local matrixes
 integer :: ie, il, jl, pos
 real(wp) :: aak(3,3) , fk(3), uuk(3)

 ! Global matrix
 integer, allocatable :: mmmi(:), mmmj(:), skel(:)
 real(wp), allocatable :: mass(:), uuu(:), fff(:)
 !$ real(wp), allocatable :: a_ax(:) ! to allow the reduction
 type(t_col) :: aaat, aaat1 ! we work with the transposed matrices

 ! bcs variables
 integer :: iv
 integer :: ndofs_dir, shift_int, shift_dir
 integer, allocatable :: gdl_dir(:), gdl_int(:)

 ! Time loop
 integer :: nstep, n
 real(wp) :: tt_sta, dt, tt_end, t_n, t_nm1
 integer :: n_out
 real(wp) :: dt_out, time_last_out

 ! error computations
 real(wp), allocatable :: &
   uuu_min_value(:), uuu_max_value(:), uuu_total_mass(:)
!! real(wp) :: e_u_l2, e_flux_l2
!! real(wp), allocatable :: uuu_dg(:)

 ! IO variables
 character(len=*), parameter :: out_file_nml_suff = '-nml.octxt'
 character(len=10000+len(out_file_nml_suff)) :: out_file_nml_name
 character(len=*), parameter :: out_file_base_suff = '-base.octxt'
 character(len=10000+len(out_file_base_suff)) :: out_file_base_name

 ! Auxiliary variables
 integer :: fu, ierr
 real(t_realtime) :: t00, t0, t1
 real(wp) :: courant
 character(len=10*max_char_len) :: message(4)
! type(t_bcs_error) :: b_err

 ! Input namelist
 namelist /input/ &
   ! test case
   in_testname, &
   ! output base name
   in_basename, &
   ! grid
   write_grid,  &
   grid_file,   &
   ! advection scheme
   adv_scheme,  &
   ! boundary conditions
   nbound, cbc_type, &
   ! time loop
   tt_sta, dt, tt_end, &
   dt_out,      &
   ! results
   write_sys,   &
   ! errors
   compute_error_norms, err_extra_deg

 !-----------------------------------------------------------------------
 ! Preliminary initializations
 t00 = my_second()
 call mod_messages_constructor()

 call mod_kinds_constructor()

 call mod_fu_manager_constructor()

 call mod_octave_io_constructor()

 call mod_sympoly_constructor()
 call mod_octave_io_sympoly_constructor()

 call mod_perms_constructor()
 call mod_octave_io_perms_constructor()

 call mod_linal_constructor()

 call mod_sparse_constructor()
 call mod_octave_io_sparse_constructor()
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Read input file
 call new_file_unit(fu,ierr)
 open(fu,file=trim(input_file_name), &
      status='old',action='read',form='formatted',iostat=ierr)
 read(fu,input,iostat=ierr)
 if(ierr.ne.0) call error(this_prog_name,"", &
   'Problems reading the "input" namelist.')
 close(fu,iostat=ierr)
 ! echo the input namelist
 out_file_nml_name = trim(in_basename)//out_file_nml_suff
 open(fu,file=trim(out_file_nml_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
 write(fu,input)
 close(fu,iostat=ierr)

 allocate(bc_type(nbound,2))
 read(cbc_type,*) bc_type
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! output control setup
 call mod_output_control_constructor(in_basename)
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Test case setup (including the definition of the dimension)
 call mod_testcases_constructor(in_testname)
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Define the finite element space
 t0 = my_second()

 call mod_base_constructor()

 call mod_fe_spaces_constructor()

 ! nodal Lagrangian basis
 allocate(ref_nodes(2,3)) ! avoid if realloc on assign.
 ref_nodes = tri_nodes(1)
 base = lagr_scal( ref_nodes , dg_scal(1) )

 ! write the octave output
 out_file_base_name = trim(base_name)//out_file_base_suff
 call new_file_unit(fu,ierr)
 open(fu,file=trim(out_file_base_name), &
      status='replace',action='write',form='formatted',iostat=ierr)
   call write_octave(base,'base',fu)
 close(fu,iostat=ierr)

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed FE basis construction: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------
 ! Construct the grid (including the dofs)
 t0 = my_second()

 call mod_master_el_constructor()

 call mod_grid_constructor(trim(grid_file),write_grid)

! call mod_cgdofs_constructor(ref_nodes,0,write_grid)

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Completed grid construction: elapsed time ',t1-t0,'s.'
 call info(this_prog_name,'',message(1))
 !-----------------------------------------------------------------------

! !-----------------------------------------------------------------------
! ! Collect the information concerning the boundary conditions
! call mod_bcs_constructor(bc_type)
! !-----------------------------------------------------------------------
!
! !-----------------------------------------------------------------------
! ! Precompute the mass matrix
! call mod_psi_locmat_constructor(trim(adv_scheme))
!
! call lumped_mass(mass)
! !-----------------------------------------------------------------------
!
! !-----------------------------------------------------------------------
! ! Set the initial condition
! allocate(uuu(ndofs))
! uuu = coeff_init(dofs_xy)
! !-----------------------------------------------------------------------
!
! !-----------------------------------------------------------------------
! ! Collect the bcs information: compute gdl_int and gdl_dir
! ndofs_dir = count(b_vert%bc.eq.b_dir) ! count the Dirichlet dofs
! allocate(gdl_dir(ndofs_dir))
! allocate(gdl_int(ndofs-ndofs_dir))
! ! fill in gdl_int, gdl_dir
! shift_int = 0
! shift_dir = 0
! do iv=1,grid%nv
!   if(.not.associated(b_v2bv(iv)%p)) then ! internal vertex
!     gdl_int(shift_int+1) = iv -1 ! nodes dofs are numbered as nodes
!     shift_int = shift_int+1
!   else ! boundary vertex
!     if(b_v2bv(iv)%p%bc.eq.b_dir) then
!       gdl_dir(shift_dir+1) = iv -1 ! nodes dofs are numbered as nodes
!       shift_dir = shift_dir+1
!     else
!       gdl_int(shift_int+1) = iv -1 ! nodes dofs are numbered as nodes
!       shift_int = shift_int+1
!     endif
!   endif
! enddo
! !-----------------------------------------------------------------------
!
! !-----------------------------------------------------------------------
! ! Enforce the Dirichlet bc
! do iv=1,ndofs_dir
!   uuu(gdl_dir(iv)+1:gdl_dir(iv)+1) =                  &
!     coeff_dir( reshape(grid%v(iv)%xy(:),(/ndim,1/)) , &
!                b_v2bv(gdl_dir(iv)+1)%p%breg )
! enddo
! !-----------------------------------------------------------------------
!
! !-----------------------------------------------------------------------
! ! Time integration
! !
! !   0        1                 n             nstep
! !   |--------|--------|--------|--------|------|
! ! tt_sta            t_nm1     t_n            tt_end
! !                     | step_n |
! !
! nstep = ceiling((tt_end-tt_sta)/dt)
! t_n = tt_sta
! time_last_out = 0.0_wp; n_out = 0
! allocate( uuu_min_value(0:nstep), uuu_max_value(0:nstep), &
!           uuu_total_mass(0:nstep) )
! call diagnostics(uuu_min_value(0),uuu_max_value(0),uuu_total_mass(0),&
!                  uuu, mass)
! ! various quantities still need to be computed, so we use zero values
! call write_step( 0, nstep, 0.0, 0.0_wp, 0,                       &
!   !$             0.0d0,                                          &
!   trim(in_basename), t_n, .false., aaat, (/0.0_wp/), (/0.0_wp/), &
!   uuu, uuu_min_value(0:0), uuu_max_value(0:0), uuu_total_mass(0:0) )
!
! allocate( mmmi(grid%ne*3**2) , mmmj(grid%ne*3**2) , fff(ndofs) )
! ! Build the matrix skeleton
! do ie=1,grid%ne
!   do il=1,3
!     do jl=1,3
!       pos = (ie-1)*3**2 + (il-1)*3 + jl
!       mmmi(pos) = dofs(il,ie) - 1
!       mmmj(pos) = dofs(jl,ie) - 1
!     enddo
!   enddo
! enddo
! call tri2col_skeleton(aaat,skel,new_tri(ndofs,ndofs,mmmj,mmmi,0.0_wp))
! !$ allocate( a_ax(aaat%nz) )
! deallocate( mmmi , mmmj )
!
! time_loop: do n=1,nstep
!
!   t0 = my_second()
!   !$ t0_omp = omp_get_wtime()
!
!   t_nm1 = t_n                  ! t^{n-1}
!   t_n = tt_sta + real(n,wp)*dt ! t^n
!   if(n.eq.nstep) then ! fix the last time step
!     t_n = tt_end
!     dt = t_n-t_nm1
!     time_last_out = huge(time_last_out)/100.0_wp
!   endif
!
!   fff = 0.0_wp
!   !$ if(.false.) &
!   aaat%ax = 0.0_wp
!   !$ a_ax = 0.0_wp
!
!   !$ if(detailed_timing_omp) ta_omp = omp_get_wtime()
!   !$omp parallel do schedule(static) &
!   !$omp          private(ie, uuk, aak, fk, pos, il, jl) &
!   !$omp          shared(grid, base, uuu, dofs, aaat, skel) &
!   !$omp          reduction( + : a_ax, fff) &
!   !$omp          default(none)
!   elem_loop: do ie=1,grid%ne
!     !---------------------------------------------------------------------
!     ! Matrix of the advection operator and rhs
!     uuk = uuu(dofs(:,ie))
!     call psi_locmat_nc( aak , fk , base , grid%e(ie) , uuk )
!     !---------------------------------------------------------------------
!
!     !---------------------------------------------------------------------
!     ! Node oriented summation of the element contributions
!     loc_test_do: do il=1,3
!       loc_shape_do: do jl=1,3
!         pos = (ie-1)*3**2 + (il-1)*3 + jl
!         !$ if(.false.) &
!         aaat%ax(skel(pos)) = aaat%ax(skel(pos)) + aak(il,jl)
!         !$ a_ax(skel(pos)) =    a_ax(skel(pos)) + aak(il,jl)
!       enddo loc_shape_do
!       fff(dofs(il,ie)) = fff(dofs(il,ie)) + fk(il)
!     enddo loc_test_do
!     !---------------------------------------------------------------------
!   enddo elem_loop
!   !$omp end parallel do
!   !$ if(detailed_timing_omp) then
!   !$   tb_omp = omp_get_wtime()
!   !$   write(*,tform_omp) prefix_omp_out//"matrix_loop" ,tb_omp-ta_omp
!   !$ endif
!
!   !$ if(detailed_timing_omp) ta_omp = omp_get_wtime()
!   !omp parallel workshare shared(aaat,a_ax)
!   !$ aaat%ax = a_ax
!   !omp end parallel workshare
!   !$ if(detailed_timing_omp) then
!   !$   tb_omp = omp_get_wtime()
!   !$   write(*,tform_omp) prefix_omp_out//"copy_back" ,tb_omp-ta_omp
!   !$ endif
!
!   !-----------------------------------------------------------------------
!   ! Partition aaat and the rhs according to the Dirichlet bcs
!   ! count the Dirichlet dofs
!   aaat1 = aaat * gdl_int
!   !-----------------------------------------------------------------------
!
!   !-----------------------------------------------------------------------
!   ! Enforce the Dirichlet bc
!   !$ if(detailed_timing_omp) ta_omp = omp_get_wtime()
!   !$omp parallel do schedule(static) &
!   !$omp          private(iv) &
!   !$omp          shared(ndofs_dir, gdl_dir, uuu, grid, ndim, b_v2bv ) &
!   !$omp          default(none)
!   do iv=1,ndofs_dir
!     uuu(gdl_dir(iv)+1:gdl_dir(iv)+1) =                  &
!       coeff_dir( reshape(grid%v(iv)%xy(:),(/ndim,1/)) , &
!                  b_v2bv(gdl_dir(iv)+1)%p%breg )
!   enddo
!   !$omp end parallel do
!   !$ if(detailed_timing_omp) then
!   !$   tb_omp = omp_get_wtime()
!   !$   write(*,tform_omp) prefix_omp_out//"bcs_loop" ,tb_omp-ta_omp
!   !$ endif
!   !-----------------------------------------------------------------------
!
!   !-----------------------------------------------------------------------
!   ! Time step
!   ! To improve efficiency it's better to work with aaa^T
!   !$ if(detailed_timing_omp) tc_omp = omp_get_wtime()
!   !omp parallel workshare
!   uuu(gdl_int+1) = uuu(gdl_int+1) - dt/mass(gdl_int+1)*matmul(uuu,aaat1)
!   !omp end parallel workshare
!   !$ if(detailed_timing_omp) then
!   !$   td_omp = omp_get_wtime()
!   !$   write(*,tform_omp) prefix_omp_out//"time_step" ,td_omp-tc_omp
!   !$ endif
!   !-----------------------------------------------------------------------
!
!   !-----------------------------------------------------------------------
!   ! Output
!   time_last_out = time_last_out + dt
!   call diagnostics(uuu_min_value(n),uuu_max_value(n),uuu_total_mass(n),&
!                    uuu, mass)
!   if(time_last_out.ge.dt_out) then
!     n_out = n_out+1
!
!     ! Terminal output
!     t1 = my_second()
!     !$ t1_omp = omp_get_wtime()
!
!     courant = dt*maxval(diag(aaat)/mass)
!     call write_step( n, nstep, t1-t0, courant, n_out,         &
!       !$ t1_omp-t0_omp,                                       &
!       trim(in_basename), t_n, write_sys, transpose(aaat),     &
!       mass, fff, uuu, uuu_min_value(0:n), uuu_max_value(0:n), &
!       uuu_total_mass(0:n) )
!
!     time_last_out = time_last_out - dt_out
!   endif
!   !$ if(detailed_timing_omp) then
!   !$   t1_omp = omp_get_wtime()
!   !$   write(*,tform_omp) prefix_omp_out//"time_loop",t1_omp-t0_omp
!   !$ endif
!   !-----------------------------------------------------------------------
! enddo time_loop
! !-----------------------------------------------------------------------
!
! !-----------------------------------------------------------------------
! ! Cleanup
! deallocate( gdl_int , gdl_dir )
! call clear( aaat )
! deallocate( skel , fff )
! !$ deallocate( a_ax )
! deallocate( uuu_min_value , uuu_max_value , uuu_total_mass )
! deallocate( mass , uuu )
! deallocate( bc_type )
!
! call mod_psi_locmat_destructor()
!
! call mod_bcs_destructor()
!
! call mod_cgdofs_destructor()

 call mod_grid_destructor()

 call mod_master_el_destructor()

 call mod_fe_spaces_destructor()

 call mod_base_destructor()

 call mod_testcases_destructor()

 call mod_output_control_destructor()

 call mod_octave_io_sparse_destructor()
 call mod_sparse_destructor()

 call mod_linal_destructor()

 call mod_octave_io_perms_destructor()
 call mod_perms_destructor()

 call mod_octave_io_sympoly_destructor()
 call mod_sympoly_destructor()

 call mod_octave_io_destructor()

 call mod_fu_manager_destructor()

 call mod_kinds_destructor()

 t1 = my_second()
 write(message(1),elapsed_format) &
   'Done: elapsed time ',t1-t00,'s.'
 call info(this_prog_name,'',message(1))

 call mod_messages_destructor()

 !-----------------------------------------------------------------------

contains

 !-----------------------------------------------------------------------

 pure subroutine diagnostics(umin,umax,mass,u,m)
  real(wp), intent(out) :: umin, umax, mass
  real(wp), intent(in) :: u(:), m(:)

   umin = minval(u)
   umax = maxval(u)
   mass = sum( u*m )

 end subroutine diagnostics

 !-----------------------------------------------------------------------

 subroutine write_step( n,nstep,wc_time,courant,n_out,   &
                        !$ wc_time_omp,                  &
                        basename,t_n,write_sys,a,mass,f, &
                        u,u_min,u_max,u_mass )
  logical, intent(in) :: write_sys
  integer, intent(in) :: n, nstep, n_out
  real(t_realtime), intent(in) :: wc_time
  !$ double precision :: wc_time_omp
  real(wp), intent(in) :: courant, t_n, mass(:), f(:), u(:), &
    u_min(:), u_max(:), u_mass(:)
  character(len=*), intent(in) :: basename
  type(t_col) :: a

  integer :: fu, ierr

  character(len=*), parameter :: time_stamp_format = '(i4.4)'
  character(len=4) :: time_stamp
  character(len=*), parameter :: suff1 = '-res-'
  character(len=*), parameter :: suff2 = '.octxt'
  character(len= len_trim(basename) + len(suff1) + 4 + len(suff2)) :: &
     out_file_res_name

   ! Terminal output
   write(message(1),'(a,i6,a,i6,a)') &
     'Time step ',n,' of ',nstep,'; elapsed time '
   write(message(1),elapsed_format) trim(message(1)),wc_time,'s.'
   write(message(2),'(a,f6.3,a)') 'Courant number ',courant,'.'
   !$ write(message(3),elapsed_format) &
   !$   'OpenMP elapsed time ',wc_time_omp,'s.'
   call info(this_prog_name,'',message(1:2   &
   !$                                     +1 &
                                            ))

   ! File output
   call new_file_unit(fu,ierr)
   write(time_stamp,time_stamp_format) n_out
   out_file_res_name = trim(basename)//suff1//time_stamp//suff2
   open(fu,file=trim(out_file_res_name), &
        status='replace',action='write',form='formatted',iostat=ierr)
     ! preamble
     call write_octave(test_name       ,'test_name'       ,fu)
     call write_octave(test_description,'test_description',fu)
     call write_octave(t_n,'time',fu)
     call write_octave(courant,'C',fu)
     ! data
     if(write_sys) then
       call write_octave(gdl_int,'c','gdl_int',fu)
       call write_octave(gdl_dir,'c','gdl_dir',fu)
       call write_octave(a,'aaa',fu)
       call write_octave(mass,'c','mass',fu)
       call write_octave(f,'c','fff',fu)
     endif
     call write_octave(u,'c','uuu',fu)
     call write_octave(u_min ,'c','u_min' ,fu)
     call write_octave(u_max ,'c','u_max' ,fu)
     call write_octave(u_mass,'c','u_mass',fu)
!       if(compute_error_norms) then
!         call write_octave(e_u_l2,'e_u_l2',fu)
!         call write_octave(e_flux_l2,'e_flux_l2',fu)
!       endif
     close(fu,iostat=ierr)

 end subroutine write_step

 !-----------------------------------------------------------------------

end program psi_main

